Objectives:
1) Learn the basics of dplyr
-Selecting, Filtering, and Arranging
-Making a series of function with pipes %>%
-Mutate new and existing columns
-Wide and long tables

2) Learn the basics of ggplot2
-Setting up your dataframe for easy plotting
-Plot lines, scatter plots, boxplots
-Compare different subpopulations

Installing and Loading packages

#Comment out if you've already installed them
#install.packages("dplyr")
#install.packages("tidyr")
#install.packages("ggplot2")

#Load libraries
library(dplyr)
library(tidyr)
library(ggplot2)

Loading your dataset

setwd("~/Documents/Courses/Bootcamp/bootcamp_2020/bootcamp_2020-master/")
PKdata <- read.csv("simtab.csv")

#Let's take a glimpse at your dataset
head(PKdata)
##   ID PT_BIRTHDATE PT_USUBJID               PT_ADDRESS TIME     IPRED
## 1  1   09/20/1982    XDP5409 7385 San Francisco Drive    0 0.0000000
## 2  1   09/20/1982    XDP5409 7385 San Francisco Drive    1 0.5701100
## 3  1   09/20/1982    XDP5409 7385 San Francisco Drive    2 0.4966000
## 4  1   09/20/1982    XDP5409 7385 San Francisco Drive    4 0.1929500
## 5  1   09/20/1982    XDP5409 7385 San Francisco Drive    8 0.0159030
## 6  1   09/20/1982    XDP5409 7385 San Francisco Drive   12 0.0010842
##          DV AMT HT     BW SEX HIV IWRES CWRES    PRED        RES WRES DOSE
## 1 0.0000000 300 69 78.298   1   0    NA     0      NA  0.0000000    0  300
## 2 0.6239100   0 69 78.298   1   0   -10     0 0.36325  0.0058536    0  300
## 3 0.6373500   0 69 78.298   1   0   -10     0 0.47078  0.0268820    0  300
## 4 0.1857800   0 69 78.298   1   0   -10     0 0.46926  0.0861620    0  300
## 5 0.0149030   0 69 78.298   1   0   -10     0 0.35684 -0.0715120    0  300
## 6 0.0010393   0 69 78.298   1   0   -10     0 0.26497 -0.0797670    0  300
#Alternatively, click on PKdata in your Global Environment toolbar on the right

#Let's open up the data dictionary too, it's always good practice to know your dataset

Understanding your dataset

#Below are a few other ways to get a high-level glance at your data
glimpse(PKdata)
## Observations: 6,300
## Variables: 18
## $ ID           <int> 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, …
## $ PT_BIRTHDATE <fct> 09/20/1982, 09/20/1982, 09/20/1982, 09/20/1982, 09/…
## $ PT_USUBJID   <fct> XDP5409, XDP5409, XDP5409, XDP5409, XDP5409, XDP540…
## $ PT_ADDRESS   <fct> 7385 San Francisco Drive, 7385 San Francisco Drive,…
## $ TIME         <int> 0, 1, 2, 4, 8, 12, 24, 0, 1, 2, 4, 8, 12, 24, 0, 1,…
## $ IPRED        <dbl> 0.0000e+00, 5.7011e-01, 4.9660e-01, 1.9295e-01, 1.5…
## $ DV           <dbl> 0.0000e+00, 6.2391e-01, 6.3735e-01, 1.8578e-01, 1.4…
## $ AMT          <int> 300, 0, 0, 0, 0, 0, 0, 300, 0, 0, 0, 0, 0, 0, 300, …
## $ HT           <int> 69, 69, 69, 69, 69, 69, 69, 59, 59, 59, 59, 59, 59,…
## $ BW           <dbl> 78.298, 78.298, 78.298, 78.298, 78.298, 78.298, 78.…
## $ SEX          <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, …
## $ HIV          <int> 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, …
## $ IWRES        <int> NA, -10, -10, -10, -10, -10, -10, NA, -10, -10, -10…
## $ CWRES        <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ PRED         <dbl> NA, 0.363250, 0.470780, 0.469260, 0.356840, 0.26497…
## $ RES          <dbl> 0.0000000, 0.0058536, 0.0268820, 0.0861620, -0.0715…
## $ WRES         <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ DOSE         <int> 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 3…
summary(PKdata)
##        ID            PT_BIRTHDATE    PT_USUBJID  
##  Min.   :  1.0   09/20/1982:6300   XDP1255:  14  
##  1st Qu.:225.8                     XDP1529:  14  
##  Median :450.5                     XDP1868:  14  
##  Mean   :450.5                     XDP2059:  14  
##  3rd Qu.:675.2                     XDP2093:  14  
##  Max.   :900.0                     XDP2158:  14  
##                                    (Other):6216  
##                     PT_ADDRESS        TIME            IPRED         
##  5511 San Francisco Drive:  21   Min.   : 0.000   Min.   :0.000000  
##  5647 San Francisco Drive:  21   1st Qu.: 1.000   1st Qu.:0.000139  
##  1325 San Francisco Drive:  14   Median : 4.000   Median :0.135285  
##  1499 San Francisco Drive:  14   Mean   : 7.286   Mean   :0.504987  
##  1752 San Francisco Drive:  14   3rd Qu.:12.000   3rd Qu.:0.773098  
##  2202 San Francisco Drive:  14   Max.   :24.000   Max.   :6.612700  
##  (Other)                 :6202                                      
##        DV                AMT               HT              BW       
##  Min.   :0.000000   Min.   :  0.00   Min.   :46.00   Min.   :36.21  
##  1st Qu.:0.000141   1st Qu.:  0.00   1st Qu.:60.00   1st Qu.:54.71  
##  Median :0.135630   Median :  0.00   Median :64.00   Median :59.82  
##  Mean   :0.504856   Mean   : 85.71   Mean   :64.18   Mean   :60.00  
##  3rd Qu.:0.777958   3rd Qu.:  0.00   3rd Qu.:68.00   3rd Qu.:65.43  
##  Max.   :6.361700   Max.   :900.00   Max.   :84.00   Max.   :83.69  
##                                                                     
##       SEX           HIV             IWRES         CWRES        PRED       
##  Min.   :0.0   Min.   :0.0000   Min.   :-10   Min.   :0   Min.   :0.0706  
##  1st Qu.:0.0   1st Qu.:0.0000   1st Qu.:-10   1st Qu.:0   1st Qu.:0.3590  
##  Median :0.5   Median :0.0000   Median :-10   Median :0   Median :0.5854  
##  Mean   :0.5   Mean   :0.1956   Mean   :-10   Mean   :0   Mean   :0.6706  
##  3rd Qu.:1.0   3rd Qu.:0.0000   3rd Qu.:-10   3rd Qu.:0   3rd Qu.:0.9429  
##  Max.   :1.0   Max.   :1.0000   Max.   :-10   Max.   :0   Max.   :1.4176  
##                                 NA's   :900               NA's   :900     
##       RES                  WRES        DOSE    
##  Min.   :-0.5040400   Min.   :0   Min.   :300  
##  1st Qu.:-0.0515205   1st Qu.:0   1st Qu.:300  
##  Median : 0.0000000   Median :0   Median :600  
##  Mean   :-0.0002373   Mean   :0   Mean   :600  
##  3rd Qu.: 0.0461260   3rd Qu.:0   3rd Qu.:900  
##  Max.   : 0.7366200   Max.   :0   Max.   :900  
## 
#Don't know what a function does? Add a question mark to view its documentation
?table

#You can use table to compare counts of different variables
table(PKdata$SEX, PKdata$HIV)
##    
##        0    1
##   0 2541  609
##   1 2527  623
#To add titles
table("SEX" = PKdata$SEX, "HIV" = PKdata$HIV)
##    HIV
## SEX    0    1
##   0 2541  609
##   1 2527  623
#How many females were in each dose level?
table("DOSE" = PKdata$DOSE,"SEX" = PKdata$SEX)
##      SEX
## DOSE     0    1
##   300 1001 1099
##   600 1036 1064
##   900 1113  987

Dplyr Cheatsheet https://raw.githubusercontent.com/rstudio/cheatsheets/master/data-transformation.pdf

Learn the basics of dplyr

Selecting

#You can select specific columns using select()
#Let's de-identify our dataset by selecting the columns we want
deidentified <- select(PKdata, ID, TIME, DV, DOSE, HT, BW, SEX, HIV)
head(deidentified)
##   ID TIME        DV DOSE HT     BW SEX HIV
## 1  1    0 0.0000000  300 69 78.298   1   0
## 2  1    1 0.6239100  300 69 78.298   1   0
## 3  1    2 0.6373500  300 69 78.298   1   0
## 4  1    4 0.1857800  300 69 78.298   1   0
## 5  1    8 0.0149030  300 69 78.298   1   0
## 6  1   12 0.0010393  300 69 78.298   1   0
#You can also do this by "subtracting" the columns you don't want with -
deidentified <- select(PKdata, -PT_BIRTHDATE, -PT_ADDRESS, -PT_USUBJID)
head(deidentified)
##   ID TIME     IPRED        DV AMT HT     BW SEX HIV IWRES CWRES    PRED
## 1  1    0 0.0000000 0.0000000 300 69 78.298   1   0    NA     0      NA
## 2  1    1 0.5701100 0.6239100   0 69 78.298   1   0   -10     0 0.36325
## 3  1    2 0.4966000 0.6373500   0 69 78.298   1   0   -10     0 0.47078
## 4  1    4 0.1929500 0.1857800   0 69 78.298   1   0   -10     0 0.46926
## 5  1    8 0.0159030 0.0149030   0 69 78.298   1   0   -10     0 0.35684
## 6  1   12 0.0010842 0.0010393   0 69 78.298   1   0   -10     0 0.26497
##          RES WRES DOSE
## 1  0.0000000    0  300
## 2  0.0058536    0  300
## 3  0.0268820    0  300
## 4  0.0861620    0  300
## 5 -0.0715120    0  300
## 6 -0.0797670    0  300
#You can also search through your columns with helpers: contains(), starts_with(), ends_with()
What_did_this_do <- select(PKdata, ends_with("V"))
head(What_did_this_do)
##          DV HIV
## 1 0.0000000   0
## 2 0.6239100   0
## 3 0.6373500   0
## 4 0.1857800   0
## 5 0.0149030   0
## 6 0.0010393   0
#Write your answer commented out below:
#removed all columns that end with V

#Can you use helpers to create the same deidentified dataset?
deidentified <- select(PKdata, -starts_with("PT"))
head(deidentified)
##   ID TIME     IPRED        DV AMT HT     BW SEX HIV IWRES CWRES    PRED
## 1  1    0 0.0000000 0.0000000 300 69 78.298   1   0    NA     0      NA
## 2  1    1 0.5701100 0.6239100   0 69 78.298   1   0   -10     0 0.36325
## 3  1    2 0.4966000 0.6373500   0 69 78.298   1   0   -10     0 0.47078
## 4  1    4 0.1929500 0.1857800   0 69 78.298   1   0   -10     0 0.46926
## 5  1    8 0.0159030 0.0149030   0 69 78.298   1   0   -10     0 0.35684
## 6  1   12 0.0010842 0.0010393   0 69 78.298   1   0   -10     0 0.26497
##          RES WRES DOSE
## 1  0.0000000    0  300
## 2  0.0058536    0  300
## 3  0.0268820    0  300
## 4  0.0861620    0  300
## 5 -0.0715120    0  300
## 6 -0.0797670    0  300

Filtering

#You can filter out specific rows using filter()
#For example, selecting all patients who received a specific dose
Dosed_600 <- filter(PKdata, DOSE == 600)
head(Dosed_600)
##    ID PT_BIRTHDATE PT_USUBJID               PT_ADDRESS TIME    IPRED
## 1 301   09/20/1982    XDP5607 1296 San Francisco Drive    0 0.000000
## 2 301   09/20/1982    XDP5607 1296 San Francisco Drive    1 0.856700
## 3 301   09/20/1982    XDP5607 1296 San Francisco Drive    2 0.925840
## 4 301   09/20/1982    XDP5607 1296 San Francisco Drive    4 0.595740
## 5 301   09/20/1982    XDP5607 1296 San Francisco Drive    8 0.164730
## 6 301   09/20/1982    XDP5607 1296 San Francisco Drive   12 0.042732
##         DV AMT HT     BW SEX HIV IWRES CWRES    PRED      RES WRES DOSE
## 1 0.000000 600 70 64.202   1   0    NA     0      NA 0.000000    0  600
## 2 0.864300   0 70 64.202   1   0   -10     0 0.72684 0.027128    0  600
## 3 0.728240   0 70 64.202   1   0   -10     0 0.94255 0.137470    0  600
## 4 0.544750   0 70 64.202   1   0   -10     0 0.94086 0.066440    0  600
## 5 0.144740   0 70 64.202   1   0   -10     0 0.71775 0.019561    0  600
## 6 0.039924   0 70 64.202   1   0   -10     0 0.53471 0.073796    0  600
#You can use any logical function in R to designate your filter criteria
# > greater than, <= greater equal than
# < less than, <= lesser equal than
# ! not
# & and
# | or
#Can you filter for patients with body weight greater or equal than 45kg?
WT.GT.45 <- filter(PKdata, BW >= 45)
head(WT.GT.45)
##   ID PT_BIRTHDATE PT_USUBJID               PT_ADDRESS TIME     IPRED
## 1  1   09/20/1982    XDP5409 7385 San Francisco Drive    0 0.0000000
## 2  1   09/20/1982    XDP5409 7385 San Francisco Drive    1 0.5701100
## 3  1   09/20/1982    XDP5409 7385 San Francisco Drive    2 0.4966000
## 4  1   09/20/1982    XDP5409 7385 San Francisco Drive    4 0.1929500
## 5  1   09/20/1982    XDP5409 7385 San Francisco Drive    8 0.0159030
## 6  1   09/20/1982    XDP5409 7385 San Francisco Drive   12 0.0010842
##          DV AMT HT     BW SEX HIV IWRES CWRES    PRED        RES WRES DOSE
## 1 0.0000000 300 69 78.298   1   0    NA     0      NA  0.0000000    0  300
## 2 0.6239100   0 69 78.298   1   0   -10     0 0.36325  0.0058536    0  300
## 3 0.6373500   0 69 78.298   1   0   -10     0 0.47078  0.0268820    0  300
## 4 0.1857800   0 69 78.298   1   0   -10     0 0.46926  0.0861620    0  300
## 5 0.0149030   0 69 78.298   1   0   -10     0 0.35684 -0.0715120    0  300
## 6 0.0010393   0 69 78.298   1   0   -10     0 0.26497 -0.0797670    0  300
#You can also use & or | to create more complex logical functions
#Let's filter for patients that received a dose greater than or equal to 600 and HIV+
Dosed600up_HIV <- filter(PKdata, DOSE >= 600 & HIV == 1)
head(Dosed600up_HIV)
##    ID PT_BIRTHDATE PT_USUBJID               PT_ADDRESS TIME      IPRED
## 1 315   09/20/1982    XDP4217 7593 San Francisco Drive    0 0.0000e+00
## 2 315   09/20/1982    XDP4217 7593 San Francisco Drive    1 8.6281e-01
## 3 315   09/20/1982    XDP4217 7593 San Francisco Drive    2 5.0481e-01
## 4 315   09/20/1982    XDP4217 7593 San Francisco Drive    4 9.2131e-02
## 5 315   09/20/1982    XDP4217 7593 San Francisco Drive    8 1.8924e-03
## 6 315   09/20/1982    XDP4217 7593 San Francisco Drive   12 3.5118e-05
##           DV AMT HT     BW SEX HIV IWRES CWRES    PRED         RES WRES
## 1 0.0000e+00 600 64 65.279   1   1    NA     0      NA  0.00000000    0
## 2 8.4877e-01   0 64 65.279   1   1   -10     0 0.71913  0.01188800    0
## 3 5.3510e-01   0 64 65.279   1   1   -10     0 0.92031 -0.00044695    0
## 4 8.5472e-02   0 64 65.279   1   1   -10     0 0.88979  0.16336000    0
## 5 1.7576e-03   0 64 65.279   1   1   -10     0 0.63150  0.20880000    0
## 6 2.9666e-05   0 64 65.279   1   1   -10     0 0.43691  0.10928000    0
##   DOSE
## 1  600
## 2  600
## 3  600
## 4  600
## 5  600
## 6  600
#Challenge: Filter for patients who received the lowest or highest dose, and is female with HIV or male
#Remember there are always many ways to code something :)
Challenge <- filter(PKdata, DOSE != 600 & (HIV == 1 | SEX == 1))
head(Challenge)
##   ID PT_BIRTHDATE PT_USUBJID               PT_ADDRESS TIME     IPRED
## 1  1   09/20/1982    XDP5409 7385 San Francisco Drive    0 0.0000000
## 2  1   09/20/1982    XDP5409 7385 San Francisco Drive    1 0.5701100
## 3  1   09/20/1982    XDP5409 7385 San Francisco Drive    2 0.4966000
## 4  1   09/20/1982    XDP5409 7385 San Francisco Drive    4 0.1929500
## 5  1   09/20/1982    XDP5409 7385 San Francisco Drive    8 0.0159030
## 6  1   09/20/1982    XDP5409 7385 San Francisco Drive   12 0.0010842
##          DV AMT HT     BW SEX HIV IWRES CWRES    PRED        RES WRES DOSE
## 1 0.0000000 300 69 78.298   1   0    NA     0      NA  0.0000000    0  300
## 2 0.6239100   0 69 78.298   1   0   -10     0 0.36325  0.0058536    0  300
## 3 0.6373500   0 69 78.298   1   0   -10     0 0.47078  0.0268820    0  300
## 4 0.1857800   0 69 78.298   1   0   -10     0 0.46926  0.0861620    0  300
## 5 0.0149030   0 69 78.298   1   0   -10     0 0.35684 -0.0715120    0  300
## 6 0.0010393   0 69 78.298   1   0   -10     0 0.26497 -0.0797670    0  300

Arranging

#arrange() will reorder rows or columns according to your specifications (default from low to high)
#For example in PK data we will often organize our rows by patient ID, then by time.
PKdata <- arrange(PKdata, ID, TIME)

Putting it all together with Pipes %>%

#Pipes are used to connect the output of one function to the input of another
#Let's deidentify our dataset, filter for 600mg dose, and arrange by id and time all together
Dose_600 <- PKdata %>%
  select( -starts_with("PT")) %>%
  filter(DOSE == 600) %>%
  arrange(ID, TIME)

#Try this one for yourself!
#Let's try filtering for HIV+ or body weight less than 45kg, then...
#we want to see the trough levels of each patient (defined as the 24hr timepoint)  

Trough <-   PKdata %>%
  select( -starts_with("PT")) %>%
  filter(HIV == 1 | BW < 45) %>%
  filter(TIME == 24)

View(Trough)

Mutate

#mutate() can create new columns or alter existing ones
#For example, BMI is often calculated from HT and WT
#BMI = weight(kg)/height(m)^2

PKdata <- PKdata %>%
  mutate(BMI = BW/(HT/39.37)^2) #are the units correct? check the data dictionary!

#Now you try making a new column and converting DV from mg/L into micromoles!
#The drug used here is remdesivir

PKdata<- PKdata %>%
  mutate(Conc_uM = DV/1000/602.6*1000000) #mg/L *1g/1000mg * 1mol/602.6g * 1000000uM/1M

Learn the basics of ggplot2

Our first Plot!!

ggplot2 cheatsheet: https://raw.githubusercontent.com/rstudio/cheatsheets/master/data-visualization-2.1.pdf

ggplot(PKdata) + 
  geom_point(aes(x=TIME, y=DV)) ##What are we trying to plot with a PK profile?

#Do you like lines better?
ggplot(PKdata) + 
 geom_line(aes(x=TIME, y=DV,group=factor(ID)),alpha = 0.5, linetype=2,size=0.1)

#Let's put them on the same graph with a +

ggplot(PKdata) + 
 geom_line(aes(x=TIME, y=DV,group=factor(ID)),alpha = 0.5, linetype=2,size=0.1) +
  geom_point(aes(x=TIME, y=DV))

Graphing Subpopulations

ggplot(PKdata) + 
  geom_point(aes(x=TIME, y=DV, color = as.factor(DOSE))) + 
  geom_line(aes(x=TIME, y=DV, group=factor(ID), color = as.factor(DOSE)),alpha = 0.5, linetype=2,size=0.1)

Another way to stratify

#It's still a little too cluttered
ggplot(PKdata) + 
  geom_point(aes(x=TIME, y=DV, color = as.factor(DOSE))) + 
  geom_line(aes(x=TIME, y=DV, group=factor(ID), color = as.factor(DOSE)),alpha = 0.5, linetype=2,size=0.1) +
  facet_wrap(.~DOSE)

Let’s try some on your own

#Copy and paste from above, and let's plot stratified by HIV status
ggplot(PKdata) + 
  geom_point(aes(x=TIME, y=DV, color = as.factor(HIV))) +
  geom_line(aes(x=TIME, y=DV, group=factor(ID), color = as.factor(HIV)),alpha = 0.5, linetype=2,size=0.1) +
  facet_wrap(.~DOSE)

Two stratifications

#Now let's try adding 2 stratifications: color by HIV and facet_wrap by SEX~DOSE. Then try the other way around!
ggplot(PKdata) + 
  geom_point(aes(x=TIME, y=DV, color = as.factor(HIV))) + 
  geom_line(aes(x=TIME, y=DV, group=factor(ID), color = as.factor(HIV)),alpha = 0.5, linetype=2,size=0.1) +
  facet_wrap(SEX~DOSE)

ggplot(PKdata) + 
  geom_point(aes(x=TIME, y=DV, color = as.factor(SEX))) +
  geom_line(aes(x=TIME, y=DV, group=factor(ID), color = as.factor(SEX)),alpha = 0.5, linetype=2,size=0.1) +
  facet_wrap(HIV~DOSE)

#Challenge: Plot stratified by body weight, one color for above 45kg and another color for below. Make sure to have a legend :)
#Hint: you will need to manipulate your dataframe

PKdata <- PKdata %>%
  mutate(WT_FLAG = ifelse(BW > 45, 1, 0))

ggplot(PKdata) + 
  geom_point(aes(x=TIME, y=DV, color = as.factor(WT_FLAG))) + 
  geom_line(aes(x=TIME, y=DV, group=factor(ID), color = as.factor(WT_FLAG)),alpha = 0.5, linetype=2,size=0.1) +
  facet_wrap(.~DOSE)

Summarise

Dplyr can be used for simple summary statistics

#You can use summarise to calculate means, ranges, standard deviations, quartiles, etc.
Summary_stats <- PKdata %>%
  summarise(c_mean = mean(DV),
            c_stdev = sd(DV),
            BW_mean = mean(BW),
            BW_upper = quantile(BW, probs = 0.975),
            BW_lower = quantile(BW, probs = 0.225))

Summary_stats
##      c_mean   c_stdev  BW_mean BW_upper BW_lower
## 1 0.5048556 0.7323878 59.99685   76.187   53.955

Combined with group_by(), summarise is a force to be reckoned with..

#Let's calculate subpopulation summary statistics
#For example, let's calculate body weight summary statistics by SEX
BW_by_SEX <- PKdata %>%
  group_by(SEX) %>%
  summarise(BW_mean = mean(BW),
            BW_upper = quantile(BW, probs = 0.975),
            BW_lower = quantile(BW, probs = 0.225))

#Let's plot it!
ggplot(BW_by_SEX) +
  geom_point(aes(x=SEX, y=BW_mean)) +
  geom_errorbar((aes(x=SEX, ymin=BW_lower, ymax=BW_upper)))

#An easier way to do it in just ggplot, there are always multiple ways to do the same thing
ggplot(PKdata) +
  geom_boxplot(aes(x=as.factor(SEX), y=BW))

Let’s calculate the median PK profile

#Let's start with calculating median and percentile profiles 
Med <- PKdata %>%
  group_by(TIME) %>%
  summarise(median = median(DV),
            upper = quantile(DV, probs=0.975), #Upper 95th percentile
            lower = quantile(DV, probs=0.225)) #Lower 95th percentile

#Let's plot it
ggplot(Med) +
  geom_line(aes(x=TIME, y=median)) +
  geom_ribbon(aes(x=TIME, ymin=lower, ymax=upper), alpha=0.3)

Let’s compare median PK profiles of subpopulations

#Let's do the same thing but subset by DOSE
DOSE_Med <- PKdata %>%
  group_by(DOSE,TIME) %>%
  summarise(median = median(DV),
            upper = quantile(DV, probs=0.975), #Upper 95th percentile
            lower = quantile(DV, probs=0.225)) #Lower 95th percentile

#Let's plot it
ggplot(DOSE_Med) +
  geom_line(aes(x=TIME, y=median, color = as.factor(DOSE))) +
  geom_ribbon(aes(x=TIME, ymin=lower, ymax=upper, fill = as.factor(DOSE)), alpha=0.3)

Let’s try another one…

#Median PK profiles, subset by HIV

HIV_Med <- PKdata %>%
  group_by(HIV,TIME) %>%
  summarise(median = median(DV),
            upper = quantile(DV, probs=0.975), #Upper 95th percentile
            lower = quantile(DV, probs=0.225)) #Lower 95th percentile

#Let's plot it
ggplot(HIV_Med) +
  geom_line(aes(x=TIME, y=median, color = as.factor(HIV))) +
  geom_ribbon(aes(x=TIME, ymin=lower, ymax=upper, fill = as.factor(HIV)), alpha=0.3)

This doesn’t work super well…the different dosing levels make the stratification cluttered

#We need to subset by DOSE and HIV
#Prepare the dataset yourself

HIV_Med <- PKdata %>%
  group_by(DOSE, HIV,TIME) %>%
  summarise(median = median(DV),
            upper = quantile(DV, probs=0.975), #Upper 95th percentile
            lower = quantile(DV, probs=0.225)) #Lower 95th percentile

#Let's plot it
ggplot(HIV_Med) +
  geom_line(aes(x=TIME, y=median, color = as.factor(HIV))) +
  geom_ribbon(aes(x=TIME, ymin=lower, ymax=upper, fill = as.factor(HIV)), alpha=0.3) +
  facet_wrap(.~DOSE) +
  scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis